#!/usr/bin/env python3
# =============================================================================
# @file    convert_loyola_oracle
# @brief   Take the Loyola U. Delaware identifier "oracle" set and convert it.
# @author  Michael Hucka
# @license Please see the file named LICENSE in the project directory
# @website https://github.com/casics/extractor
#
# This was used to create the file "ludiso.tsv" in ../tests/data.  It is
# meant to be run on the text file from the "Loyola University of Delaware
# Identifier Splitting Oracle", available from the website at
# http://www.cs.loyola.edu/~binkley/ludiso/. (An archived copy of the web page
# and original data file is available in ../tests/data/archived/ludiso/.)
#
# Example invocation:
# ./convert_loyola_oracle -i ludiso.txt -o ludiso.tsv
# =============================================================================

import csv
import math
import pickle
import plac
import re
import sys

try:
    sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
except:
    sys.path.append("..")


# Main
# .............................................................................

@plac.annotations(
    debug      = ('drop into ipdb opening files',         'flag',   'd'),
    inputfile  = ('input text file',                      'option', 'i'),
    outputfile = ('output file',                          'option', 'o'),
    pickle     = ('output as pickle file (default: csv)', 'flag',   'p'),
)

def convert_file(inputfile=None, outputfile=None, debug=False, pickle=False):
    expected = {}
    try:
        if debug:
            import ipdb; ipdb.set_trace()
        with open(inputfile, 'r') as input:
            total = 0
            for line in input:
                (_, token, _, _, _, _, result, *_) = line.split()
                expected[token] = [x for x in result.split('-')]
                total += 1
        if pickle:
            with open(outputfile, 'wb') as pickle_file:
                pickle.dump(expected, pickle_file)
                print('{} tokens written to {}'.format(total, outputfile))
        else:
            with open(outputfile, 'w') as file:
                for k, v in expected.items():
                    file.write(k)
                    file.write('	')
                    file.write(','.join(v))
                    file.write('\n')
    except Exception as err:
        print(err)


if __name__ == '__main__':
    plac.call(convert_file)
